getwd()
setwd("C:/Users/Yunfei/Documents/Spring2015/G2001/Paperwork2nd")
if (!require("PropCIs"))   {install.packages("PropCIs");require(PropCIs)}
if (!require("xtable"))    {install.packages("xtable");require(xtable)}
if (!require("MatchIt"))   {install.packages("MatchIt");require(MatchIt)}
if (!require("cem"))       {install.packages("cem");require(cem)}
if (!require("Zelig"))     {install.packages("Zelig");require(Zelig)}
if (!require("MatchingFrontier"))
                           {install.packages("MatchingFrontier");require(MatchingFrontier)}

data<-read.csv("women_all_update.csv")
colnames(data)
head(data)
table(data$year)
table(data$year,data$births)
data.births<-data[which(data$births==1),]
table(data.births$year,data.births$kigali)
table(data.births$year,data.births$anc)

#Table 1 Individuals included in the analyses of maternal care in the regions excluding Kigali City with RDHS, from year 2005 to year 2010
Table1<-matrix(0,4,3)
colnames(Table1)<-c("RDHS2005","RDHS2008","RDHS2010")
rownames(Table1)<-c("Total number of interviewed women","Number of women included in maternal care analyses",
                    "Number of women included in maternal care analyses in regions excluding Kigali City",
                    "Number of women included in maternal care analyses with ANC")
Table1[1,]<-table(data$year)
Table1[2,]<-table(data$year,data$births)[,2]
Table1[3,]<-table(data.births$year,data.births$kigali)[,1]
Table1[4,]<-table(data.births$year,data.births$anc)[,2]
Table1
write.csv(Table1,file="Table1.csv") 
N.full<-sum(Table1[2,])
N.full                    #1913
N.subgroup.nonKigali<-sum(Table1[3,])
N.subgroup.nonKigali  #1794
N.subgroup.anc<-sum(Table1[4,])
N.subgroup.anc  #1850
##Table 1 is same for all three analyses groups: full participants, participants only from non-Kigali region, particpants with ANC
##From bellow starting at Table 2-Table 6 is different results, of course.
##Code is same for the three, just update data with different criteria of participants for saving time

#Table2 Checking endogeneity of Mutuelles: mean difference of self-reported illness and birth delivery by Mutuelles status (RDHS)
table2pre<-matrix(0,4,4)
colnames(table2pre)<-c("mutuelles","2005","2008","2010")
rownames(table2pre)<-rep(c("no births","births"),2)
table2pre[,1]<-c(0,0,1,1)
table2pre[1:2,2:4]<-table(data$births, data$year,data$mutuelles)[,,1] #Put the largest categories on the end instead of at the beginning
table2pre[3:4,2:4]<-table(data$births, data$year,data$mutuelles)[,,2] #Put the largest categories on the end instead of at the beginning
table2pre
Table2<-matrix(0,9,3)
colnames(Table2)<-c("mean","CIlw","CIup")
rownames(Table2)<-c("RDHS2005","No insurance","Mutuelles","RDHS2008","No insurance","Mutuelles","RDHS2010","No insurance","Mutuelles")

for (i in 2:4) {
  for (j in 1:2) {
  Table2[(i-1)*3+(j-2),1]<-add4ci(table2pre[j*2,i],sum(table2pre[(j*2-1):(j*2),i]),conf.level=0.95)$estimate
  Table2[(i-1)*3+(j-2),2]<-add4ci(table2pre[j*2,i],sum(table2pre[(j*2-1):(j*2),i]),conf.level=0.95)$conf.int[1] 
  Table2[(i-1)*3+(j-2),3]<-add4ci(table2pre[j*2,i],sum(table2pre[(j*2-1):(j*2),i]),conf.level=0.95)$conf.int[2] 
}
}
Table2
write.csv(Table2,file="Table2.csv")


# mutuelles, HH age, HH sex, woman's age, woman's education, rural, wealth (categorical), radio
#HH education, year

#Table 3 Descriptive statistics for variables used in analyzing utilization of skilled-birth attendance (pooled RDHS2005,2008, and 2010)
N<-table(data$births)[2]
N #N=1913
data_births<-data[which(data$births==1),]
Table3<-matrix(0,16,3)
colnames(Table3)<-c("Mean","CIlw","CIup")
rownames(Table3)<-c("DepVar Skilled birth attendance","InpVar Mutuelles","Head:age","Head:female","Woman's age","Woman's schooling","Rural residence",
                    "1st Wealth quintile","2nd Wealth quintile","3rd Weatth quintile","4th Wealth quintile","5th Wealth quintile","Radio ownership","Year 2005","Year 2008","Year2010")

tb3output<-function (list){
  tb3output<-c(list$estimate,list$conf[1],list$conf[2])
  return(tb3output=tb3output)
}
Table3[1,]<-tb3output(prop.test(table(data_births$sba==0))) #Note that prop.test keep the first coloumn as numeriator, so when set sba==0, the one with our original interest (sba==1) one is 
                                     #re-arranged as the first column, giving us the correct estimates
                                     #This rule is applicable for the following estimates, especially for wealth quintile 
Table3[2,]<-tb3output(prop.test(table(data_births$mutuelles==0)))
Table3[3,]<-tb3output(t.test(data_births$age_hhhead))
Table3[4,]<-tb3output(prop.test(table(data_births$sex_hhhead==1))) #female hhhead==2
Table3[5,]<-tb3output(t.test(data_births$age))
Table3[6,]<-tb3output(prop.test(table(data_births$educ==0)))
Table3[7,]<-tb3output(prop.test(table(data_births$rural==0)))
Table3[8,]<-tb3output(prop.test(table(data_births$wealth!=1)))
Table3[9,]<-tb3output(prop.test(table(data_births$wealth!=2)))
Table3[10,]<-tb3output(prop.test(table(data_births$wealth!=3)))
Table3[11,]<-tb3output(prop.test(table(data_births$wealth!=4)))
Table3[12,]<-tb3output(prop.test(table(data_births$wealth!=5)))
Table3[13,]<-tb3output(prop.test(table(data_births$radio==0)))
Table3[14,]<-tb3output(prop.test(table(data_births$year!=2005)))
Table3[15,]<-tb3output(prop.test(table(data_births$year!=2008)))
Table3[16,]<-tb3output(prop.test(table(data_births$year!=2010)))
Table3
write.csv(Table3,file="Table3.csv")

#Table 4 Self-reported skilled birth attendance utilization when havin births by Mutuelles status
table4pre<-matrix(0,4,4)
colnames(table4pre)<-c("mutuelles","2005","2008","2010")
rownames(table4pre)<-rep(c("no SBA","SBA"),2)
table4pre[,1]<-c(0,0,1,1)
table4pre[1:2,2:4]<-table(data_births$sba, data_births$year,data_births$mutuelles)[,,1] #Put the largest categories on the end instead of at the beginning
table4pre[3:4,2:4]<-table(data_births$sba, data_births$year,data_births$mutuelles)[,,2] #Put the largest categories on the end instead of at the beginning
table4pre
Table4<-matrix(0,9,3)
colnames(Table4)<-c("Mean","CIlw","CIup")
rownames(Table4)<-c("RDHS2005","No insurance","Mutuelles","RDHS2008","No insurance","Mutuelles","RDHS2010","No insurance","Mutuelles")

for (i in 2:4) {
  for (j in 1:2) {
    Table4[(i-1)*3+(j-2),1]<-add4ci(table4pre[j*2,i],sum(table4pre[(j*2-1):(j*2),i]),conf.level=0.95)$estimate
    Table4[(i-1)*3+(j-2),2]<-add4ci(table4pre[j*2,i],sum(table4pre[(j*2-1):(j*2),i]),conf.level=0.95)$conf.int[1] 
    Table4[(i-1)*3+(j-2),3]<-add4ci(table4pre[j*2,i],sum(table4pre[(j*2-1):(j*2),i]),conf.level=0.95)$conf.int[2] 
  }
}
Table4
write.csv(Table4,file="Table4.csv")

#Table5 Logistic regression results for skilled-birth attendance utilization

#Naive calculation of the average treatment effect:
summary(data_births$sba[data_births$mutuelles==1])["Mean"]-summary(data_births$sba[data_births$mutuelles==0])["Mean"]
#0.3272 
exp(0.3272) #OR=1.387
data_births$yearl<-as.factor(data_births$year)
data_births$wealthl<-as.factor(data_births$wealth)
data_births$wealthl<-relevel(data_births$wealthl,ref="5")
data_births$agecat_hhheadl<-as.factor(data_births$agecat_hhhead)
#Unmatched regression
unmatched <- glm(sba ~ mutuelles + yearl + agecat_hhheadl + sex_hhhead + age + educ + rural + wealthl + radio, data = data_births, family="binomial")
table5pre<-xtable(unmatched)
table5OR<-exp(cbind(OR = coef(unmatched), confint(unmatched)))
Table5_unmatched<-cbind(table5OR[,1],table5pre[,c(2,4)])
Table5_unmatched  #N=1913
write.csv(Table5_unmatched,file="Table5_unmatched.csv")

##NOTE I created another glm treating household head age as continous same as women's age, rather than as categorized variables
unmatched.b<-glm(sba ~ mutuelles + yearl + age_hhhead + sex_hhhead + age + educ + rural + wealthl + radio, data = data_births, family="binomial")
table5pre.b<-xtable(unmatched.b)
table5OR.b<-exp(cbind(OR=coef(unmatched.b),confint(unmatched.b)))
Table5_unmatched.b<-cbind(table5OR.b[,1],table5pre.b[,c(2,4)])
Table5_unmatched.b
write.csv(Table5_unmatched.b,file="Table5_unmatched.b.csv")
#Table 6. ATT of mutuelles on skilled-birth attendance utilization with CEM
#CEM matching on HH edu, HH sex, rural, district, woman's education, woman's age, wealth, year 
colnames(data_births)
colnames(data_births)[c(3,4,5,7,8,12,14,16,19,20)]
data_births_update<-data_births[,c(3,4,5,7,8,12,14,16,19,20)]  
#data_births_update<-data_births[,c(2,3,4,5,6,7,8,12,14,16)]
                        #Treat wealth and year as categorical variables  
                        #Instead of as continous variables 
                        #And Keep Only interested variables for CEM
#data_births_nona_update<-data_births_update[complete.cases(data_births),] #Delete rows with any NA values 
data_births_nona_update<-data.frame(na.omit(data_births_update))
dim(data_births_nona_update) #N_CEM=1822
#wealthg<-ordered(data_births_nona_update$wealthl)
levels(data_births_nona_update$wealthl)
wealth.grp<-list("5","1","2","3","4")
levels(data_births_nona_update$yearl)
year.grp<-list("2005","2008","2010")
order.group<-list(wealth.grp,year.grp)
order.group
       #Take year and wealth as categorical veriables, for CEM matching grouping options

auto.match <- matchit(formula = mutuelles ~ yearl + age_hhhead+ sex_hhhead + age + educ
                      +rural + wealthl + radio, data = data_births_nona_update , method = "cem",group=order.group)
auto.match

summary(data_births_nona_update$age)
summary(data_births_nona_update$age_hhhead)
agecut <- seq(15,47,5)
agecut
agecut_hhhead<- seq(16,85,5)
agecut_hhhead
my.cutpoints <- list(age=agecut, age_hhhead=agecut_hhhead)
user.match <- matchit(formula = mutuelles ~ yearl + age_hhhead+ sex_hhhead + age + educ
                      +rural + wealthl + radio, data = data_births_nona_update , 
                     method = "cem",cutpoints=my.cutpoints,grouping=order.group)
user.match
auto.data<-match.data(auto.match)
user.data<-match.data(user.match)
colnames(auto.data)
auto.imb <- imbalance(group=auto.data$mutuelles,
                      data=auto.data, 
                      drop=c("mutuelles","sba","distance","weights","subclass"))

user.imb <- imbalance(group=user.data$mutuelles,
                      data=user.data, 
                      drop=c("mutuelles","sba","distance","weights","subclass"))

auto.imb
user.imb

summary(auto.match)$nn  #215 262
summary(user.match)$nn  #242 314
auto.imb$L1  #30.6
user.imb$L1  #31.2

#user.match has higher LCS(31.2%) compared to auto.match with LCS(30.6%)
cem.match <- cem(treatment = "mutuelles",
                 data = data_births_nona_update, 
                 drop = "sba",cutpoints=my.cutpoints,grouping=order.group)
cem.match.att <- att(obj=cem.match, formula= sba~ mutuelles,
                     
                     data = data_births_nona_update, model="linear")
cem.match.att #0.143775 p=0.000680 OR=1.15
cem.match2 <- cem(treatment = "mutuelles",
                               data = data_births_nona_update, 
                               drop = "sba",cutpoints=my.cutpoints,grouping=order.group)

cem.match.att2 <- att(obj=cem.match2, formula=sba ~ mutuelles + yearl + age_hhhead+ sex_hhhead + age + educ
                      +rural + wealthl + radio,
                      data = data_births_nona_update, model="linear")
cem.match.att2
#0.146769, p=0.000129, OR=1.158086 [0.072188, 0.221350]
######################
## Frontier
## At this time from Gary's R pacakge statement, we cannot firgure out how 
## to draw frontier with categorical variables, so we take wealth and year as continous for 
## graphs of frontier
data_births_update.c<-data_births[,c(2,3,4,5,6,7,8,12,14,16)]
data_births_nona_update.c<-data.frame(na.omit(data_births_update.c))
dim(data_births_nona_update.c) #N_CEM=1822

match.variables <- names(data_births_nona_update.c)[!names(data_births_nona_update.c) %in% c("mutuelles","sba")]

our.frontier <- makeFrontier(dataset = data_births_nona_update.c,
                             treatment = 'mutuelles',
                             outcome = 'sba',
                             match.on = match.variables)
pdf("frontier.pdf")
plotFrontier(our.frontier,
             cex.lab = 1.4,
             cex.axis = 1.4,
             type = "l")
graphics.off()
my.form = as.formula(sba ~ mutuelles + age_hhhead+ sex_hhhead + age + educ
                     +rural + radio + wealth + year)
our.estimates <- 
  estimateEffects(our.frontier,
                  formula = 'sba ~ mutuelles',
                  mod.dependence.formula = my.form,
                  continuous.vars = c("age","age_hhhead"))
pdf("frontier_est.pdf")
plotEstimates(our.estimates)
graphics.off()

Table6_CEM<-matrix(0,18,3)
colnames(Table6_CEM)<-c("OR","Std.Error","Pr(>|z|)")
rownames(Table6_CEM)<-c("CEM with only mutuelles in formula","Intercept","Mutuelles",
                        "CEM with other variables in formula","Intercept","Mutuelles","year2008","year2010","age_hhhead","sex_hhhead",
                        "age","educ","rural","wealth1","wealth2","wealth3","wealth4","radio")
Table6_CEM[2,]<-c(exp(cem.match.att$att.model[1,1]),cem.match.att$att.model[c(2,4),1])
Table6_CEM[3,]<-c(exp(cem.match.att$att.model[1,2]),cem.match.att$att.model[c(2,4),2])
Table6_CEM[5,]<-c(exp(cem.match.att2$att.model[1,1]),cem.match.att2$att.model[c(2,4),1])
Table6_CEM[6,]<-c(exp(cem.match.att2$att.model[1,2]),cem.match.att2$att.model[c(2,4),2])
Table6_CEM[c(1,4),]<-NA
Table6_CEM[7:18,]<-c(exp(cem.match.att2$att.model[1,3:14]),cem.match.att2$att.model[c(2,4),3:14])
Table6_CEM

write.csv(Table6_CEM,file="Table6_CEM.csv")

